<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2016-2017 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Installation using Cloudera Manager</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-section-numbering.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="Installation" href="index.html" />
    <link rel="next" title="Installation on Amazon EMR using Bootstrap Actions" href="emr.html" />
    <link rel="prev" title="Installation" href="index.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: admin-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-components.html"> CDAP Components</a></li>
<li class="toctree-l1"><a class="reference internal" href="../deployment-architectures.html"> Deployment Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../hadoop-compatibility.html"> Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-hadoop-compatibility.html"> CDAP and Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../system-requirements.html"> System Requirements</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> Installation</a><ul class="current">
<li class="toctree-l2 current"><a class="current reference internal" href="#">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="emr.html">Amazon EMR</a></li>
<li class="toctree-l2"><a class="reference internal" href="ambari.html">Apache Ambari</a></li>
<li class="toctree-l2"><a class="reference internal" href="mapr.html">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="azure-hdinsight.html">Microsoft Azure HDInsight</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html">Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="replication.html">Replication</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../incompatibilities.html"> Incompatibilities</a></li>
<li class="toctree-l1"><a class="reference internal" href="../upgrading/index.html"> Upgrading</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/cloudera.html">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/ambari.html">Apache Ambari</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/mapr.html">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/packages.html">Packages</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../security/index.html"> Security</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../security/perimeter-security.html">Perimeter Security</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/authorization.html">Authorization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/impersonation.html">Impersonation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/system-services.html">Enabling SSL for System Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/secure-storage.html">Secure Storage</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../operations/index.html"> Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../operations/logging.html"> Logging and Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/metrics.html"> Metrics</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/operations-dashboard.html"> Dashboard and Reports</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/preferences.html"> Preferences and Runtime Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/scaling-instances.html"> Scaling Instances</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/resource-guarantees.html"> Resource Guarantees in YARN</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/tx-maintenance.html"> Transaction Service Maintenance</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/cdap-ui.html"> CDAP UI</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../appendices/index.html"> Appendices</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-site.html"> Appendix: cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-security.html"> Appendix: cdap-security.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/minimal-cdap-site.html"> Appendix: Minimal cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/hbase-ddl-executor.html"> Appendix: HBaseDDLExecutor</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="installation-using-cloudera-manager">
<span id="admin-installation-cloudera"></span><h1>Installation using Cloudera Manager<a class="headerlink" href="#installation-using-cloudera-manager" title="Permalink to this headline">🔗</a></h1>
<div class="admin-manual-step-images container">
<a class="reference internal" href="#preparing-the-cluster"><img alt="step-1" class="align-top" src="../_images/step-1.png" style="width: 166.0px; height: 90.0px;" /></a><a class="reference internal" href="#downloading-and-distributing-packages"><img alt="step-2" class="align-top" src="../_images/step-2.png" style="width: 230.0px; height: 90.0px;" /></a><a class="reference internal" href="#installing-cdap-services"><img alt="step-3" class="align-top" src="../_images/step-3.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#starting-cdap-services"><img alt="step-4" class="align-top" src="../_images/step-4.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#verification"><img alt="step-5" class="align-top" src="../_images/step-5.png" style="width: 165.0px; height: 90.0px;" /></a></div>
<p class="rubric">Notes</p>
<p>This section describes installing CDAP on Hadoop clusters managed by Cloudera Manager.</p>
<ul>
<li><p class="first">The CDAP integration with Cloudera Manager is provided in the form of a Custom Service
Descriptor (CSD), which <em>must be installed into Cloudera Manager</em> prior to installing CDAP.  The CSD
contains service definitions and configurations to make Cloudera Manager “CDAP-aware.”</p>
<p>After the CDAP CSD has been <a class="reference internal" href="#cloudera-installation-download"><span class="std std-ref">downloaded and installed</span></a>, the CDAP service
can then be installed via the usual Cloudera Manager methods. CDAP parcels will be available from the
preconfigured CDAP parcel repository, and the CDAP service can be added to a cluster using the
“Add Service” wizard.</p>
<p>A new CDAP CSD is released with each CDAP minor version (for example: 4.0, 4.1, etc.) with patch
releases as needed. The installed CSD version should always match the <code class="docutils literal notranslate"><span class="pre">major.minor</span></code> version of the
CDAP Parcel.  For example, the 6.1 CSD can be used with CDAP 6.1.x.</p>
</li>
<li><p class="first">If you are installing CDAP with the intention of using <em>replication,</em> see these
instructions on <a class="reference internal" href="replication.html#installation-replication"><span class="std std-ref">CDAP Replication</span></a> <em>before</em> installing or starting CDAP.</p>
</li>
</ul>
<div class="section" id="preparing-the-cluster">
<h2>Preparing the Cluster<a class="headerlink" href="#preparing-the-cluster" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="roles-and-dependencies">
<h3>Roles and Dependencies<a class="headerlink" href="#roles-and-dependencies" title="Permalink to this headline">🔗</a></h3>
<p>The CDAP CSD (<a class="reference external" href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cm_mc_addon_services.html#concept_qbv_3jk_bn_unique_1">Custom Service Descriptor</a>)
consists of four mandatory roles and two optional roles:</p>
<table border="1" class="docutils">
<colgroup>
<col width="20%" />
<col width="80%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head">CSD Role</th>
<th class="head">Description</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even"><td>CDAP Master Service</td>
<td>Service for managing runtime, lifecycle and resources of CDAP applications</td>
</tr>
<tr class="row-odd"><td>CDAP Gateway/Router Service</td>
<td>Service supporting REST endpoints for CDAP</td>
</tr>
<tr class="row-even"><td>CDAP Kafka Service</td>
<td>Metrics and logging transport service, using an embedded version of <em>Kafka</em></td>
</tr>
<tr class="row-odd"><td>CDAP UI Service</td>
<td>User interface for managing CDAP applications</td>
</tr>
<tr class="row-even"><td>&#160;</td>
<td>&#160;</td>
</tr>
<tr class="row-odd"><td>CDAP Security Auth Service</td>
<td>Performs client authentication for CDAP when security is enabled (<em>optional</em>)</td>
</tr>
<tr class="row-even"><td>Gateway</td>
<td><a class="reference external" href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cm_mc_managing_roles.html">Cloudera Manager Gateway Role</a>
that installs the CDAP client tools (such as the <em>CDAP CLI</em>) and configuration (<em>optional</em>)</td>
</tr>
</tbody>
</table>
<p>These roles map to the <a class="reference internal" href="../cdap-components.html#admin-manual-cdap-components"><span class="std std-ref">CDAP components</span></a> of the same name.</p>
<ul class="simple">
<li>As CDAP depends on HDFS, YARN, HBase, ZooKeeper, and (optionally) Hive and Spark, it must be installed
on cluster host(s) with full client configurations for these dependent services.</li>
<li>The CDAP Master Service role (or <em>CDAP Master</em>) must be co-located on a cluster host with an HDFS Gateway, a YARN
Gateway, an HBase Gateway, and—optionally—Hive or Spark Gateways.</li>
<li>Note that these Gateways are redundant if you are co-locating the CDAP Master role
on a cluster host (or hosts, in the case of a deployment with high availability) with
actual services, such as the HDFS Namenode, the YARN resource manager, or the HBase
Master.</li>
<li>Note that the CDAP Gateway/Router Service is not a <a class="reference external" href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cm_mc_managing_roles.html">Cloudera Manager Gateway Role</a>
but is instead another name for the CDAP Router Service.</li>
<li>CDAP also provides its own Gateway role that can be used to install CDAP client
configurations on other hosts of the cluster.</li>
<li>All services run as the <code class="docutils literal notranslate"><span class="pre">'cdap'</span></code> user installed by the parcel.</li>
</ul>
</div>
<div class="section" id="hadoop-configuration">
<h3>Hadoop Configuration<a class="headerlink" href="#hadoop-configuration" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic">
<li><p class="first">ZooKeeper’s <code class="docutils literal notranslate"><span class="pre">maxClientCnxns</span></code> must be raised from its default.  We suggest setting it to zero
(0: unlimited connections). As each YARN container launched by CDAP makes a connection to ZooKeeper,
the number of connections required is a function of usage.</p>
</li>
<li><p class="first">Ensure that YARN has sufficient memory capacity by lowering the default minimum container
size (controlled by the property <code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>). Lack of
YARN memory capacity is the leading cause of apparent failures that we see reported.
We recommend starting with these settings:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code>: 43200 <em>(see note below)</em></li>
<li><code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>: 512 mb</li>
</ul>
<p>The value we recommend for <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code> (<code class="docutils literal notranslate"><span class="pre">43200</span></code> or 12
hours) is what we use internally at Cask for testing as that provides adequate time to
capture the logs of any failures. However, you should use an appropriate non-zero value
specific to your environment. A large value can be expensive from a storage perspective.</p>
<p>Please ensure your <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.cpu-vcores</span></code> and
<code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.memory-mb</span></code> settings are set sufficiently to run CDAP,
as described in the <a class="reference internal" href="../system-requirements.html#admin-manual-memory-core-requirements"><span class="std std-ref">CDAP Memory and Core Requirements</span></a>.</p>
</li>
<li><p class="first">Add additional entries to the YARN Application Classpath for Spark jobs.</p>
<p>If you plan on running Spark programs from CDAP, CDAP requires that additional entries be added to
the YARN application classpath, as the Spark installed on Cloudera Manager clusters
is a “Hadoop-less” build and does not include Hadoop jars required by Spark.</p>
<p>To resolve this, go to the CM page for your cluster, click on the YARN service, click on
the configuration tab, and then enter <code class="docutils literal notranslate"><span class="pre">mapreduce.application.classpath</span></code> in the search box.
You will see entries similar to these:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="n">$HADOOP_MAPRED_HOME</span><span class="o">/*</span>

<span class="n">$HADOOP_MAPRED_HOME</span><span class="o">/</span><span class="n">lib</span><span class="o">/*</span>

<span class="n">$MR2_CLASSPATH</span>
</pre></div>
</div>
<p>Copy all the entries to the <code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code> configuration for YARN on your Cluster.
The <code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code> setting can be found by searching as mentioned above.</p>
<p>Add the entries required by scrolling to the last entry in the classpath form,
clicking the “+” button to add a new text box entry field at the end. Once you have
added all the entries from the <code class="docutils literal notranslate"><span class="pre">mapreduce.application.classpath</span></code> to the
<code class="docutils literal notranslate"><span class="pre">yarn.application.classpath</span></code>, click on <em>Save</em>.</p>
</li>
</ol>
<p>You can make these changes <a class="reference external" href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cm_mc_mod_configs.html">using Cloudera Manager</a>.
Please restart the stale services upon seeing a prompt to do so after making the above
changes.</p>
</div>
<div class="section" id="create-the-cdap-user">
<h3>Create the “cdap” User<a class="headerlink" href="#create-the-cdap-user" title="Permalink to this headline">🔗</a></h3>
<p><strong>The CDAP system user:</strong> As Hadoop resolves users at the NameNode, the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user must
be added there, or name resolution for the user will fail. With Cloudera Manager, the CDAP
installation will create the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user on all nodes when it is distributed or
activated on the cluster.</p>
<p>Note that Cloudera Manager can be configured to not add users specified in a installation.
This can be the case for installations whose IT policies or infrastructure do not allow
local user creation. If this is the case, manual creation of the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user on nodes
may be required.</p>
</div>
<div class="section" id="hdfs-permissions">
<span id="cloudera-hdfs-permissions"></span><h3>HDFS Permissions<a class="headerlink" href="#hdfs-permissions" title="Permalink to this headline">🔗</a></h3>
<p>Ensure YARN is configured properly to run MapReduce programs.  Often, this includes
ensuring that the HDFS <code class="docutils literal notranslate"><span class="pre">/user/yarn</span></code> and <code class="docutils literal notranslate"><span class="pre">/user/cdap</span></code> directories exist with proper
permissions:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> su hdfs
<span class="gp">$</span> hadoop fs -mkdir -p /user/yarn &amp;&amp; hadoop fs -chown yarn:yarn /user/yarn
<span class="gp">$</span> hadoop fs -mkdir -p /user/cdap &amp;&amp; hadoop fs -chown cdap:cdap /user/cdap
</pre>
</div>
</div>
</div>
<div class="section" id="downloading-and-distributing-packages">
<h2>Downloading and Distributing Packages<a class="headerlink" href="#downloading-and-distributing-packages" title="Permalink to this headline">🔗</a></h2>
<p><strong>Note:</strong> Both the <a class="reference internal" href="#cloudera-installation-download"><span class="std std-ref">Custom Service Descriptor (CSD)</span></a>
and the <a class="reference internal" href="#cloudera-installation-download-distribute-parcel"><span class="std std-ref">CDAP Parcel</span></a> must be
downloaded and installed in order to successfully install CDAP.</p>
<div class="section" id="downloading-and-installing-csd">
<span id="cloudera-installation-download"></span><h3>Downloading and Installing CSD<a class="headerlink" href="#downloading-and-installing-csd" title="Permalink to this headline">🔗</a></h3>
<p>To install CDAP on a cluster managed by Cloudera, we have available a Custom Service
Descriptor (CSD) which you can install onto your CM server. This adds CDAP to the list of
available services which CM can install.</p>
<table border="1" class="docutils" id="cloudera-compatibility-matrix">
<colgroup>
<col width="13%" />
<col width="24%" />
<col width="63%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head" colspan="3">Supported Cloudera Manager (CM) and Cloudera Distribution of Apache Hadoop (CDH) Distributions</th>
</tr>
<tr class="row-even"><th class="head">CM Version</th>
<th class="head">CDH Version</th>
<th class="head">CDAP Parcel / CSD Version</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-odd"><td>5.10</td>
<td>5.9.x through 5.10.x</td>
<td>6.1.x</td>
</tr>
<tr class="row-even"><td>5.10</td>
<td>5.8.x</td>
<td>3.5.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.10</td>
<td>5.7.x</td>
<td>3.4.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.10</td>
<td>5.5.x through 5.6.x</td>
<td>3.3.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.10</td>
<td>5.4.x</td>
<td>3.1.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.10</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td colspan="3">&#160;</td>
</tr>
<tr class="row-even"><td>5.9</td>
<td>5.9.x</td>
<td>6.1.x</td>
</tr>
<tr class="row-odd"><td>5.9</td>
<td>5.8.x</td>
<td>3.5.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.9</td>
<td>5.7.x</td>
<td>3.4.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.9</td>
<td>5.5.x through 5.6.x</td>
<td>3.3.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.9</td>
<td>5.4.x</td>
<td>3.1.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.9</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 6.1.x</td>
</tr>
<tr class="row-even"><td colspan="3">&#160;</td>
</tr>
<tr class="row-odd"><td>5.8</td>
<td>5.8.x</td>
<td>3.5.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.8</td>
<td>5.7.x</td>
<td>3.4.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.8</td>
<td>5.5.x through 5.6.x</td>
<td>3.3.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.8</td>
<td>5.4.x</td>
<td>3.1.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.8</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 6.1.x</td>
</tr>
<tr class="row-even"><td colspan="3">&#160;</td>
</tr>
<tr class="row-odd"><td>5.7</td>
<td>5.7.x</td>
<td>3.4.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.7</td>
<td>5.5.x through 5.6.x</td>
<td>3.3.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td>5.7</td>
<td>5.4.x</td>
<td>3.1.x through 6.1.x</td>
</tr>
<tr class="row-even"><td>5.7</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 6.1.x</td>
</tr>
<tr class="row-odd"><td colspan="3">&#160;</td>
</tr>
<tr class="row-even"><td>5.6</td>
<td>5.5.x through 5.6.x</td>
<td>3.3.x through 3.6.x</td>
</tr>
<tr class="row-odd"><td>5.6</td>
<td>5.4.x</td>
<td>3.1.x through 3.6.x</td>
</tr>
<tr class="row-even"><td>5.6</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 3.6.x</td>
</tr>
<tr class="row-odd"><td colspan="3">&#160;</td>
</tr>
<tr class="row-even"><td>5.5</td>
<td>5.5.x</td>
<td>3.3.x through 3.6.x</td>
</tr>
<tr class="row-odd"><td>5.5</td>
<td>5.4.x</td>
<td>3.1.x through 3.6.x</td>
</tr>
<tr class="row-even"><td>5.5</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 3.6.x</td>
</tr>
<tr class="row-odd"><td colspan="3">&#160;</td>
</tr>
<tr class="row-even"><td>5.4</td>
<td>5.4.x</td>
<td>3.1.x through 3.6.x</td>
</tr>
<tr class="row-odd"><td>5.4</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 3.6.x</td>
</tr>
<tr class="row-even"><td colspan="3">&#160;</td>
</tr>
<tr class="row-odd"><td>5.3</td>
<td>no greater than 5.3.x</td>
<td>3.0.x through 3.1.x</td>
</tr>
<tr class="row-even"><td>5.2</td>
<td>no greater than 5.2.x</td>
<td>3.0.x through 3.1.x</td>
</tr>
<tr class="row-odd"><td>5.1</td>
<td>no greater than 5.1.x</td>
<td><em>Not supported</em></td>
</tr>
</tbody>
</table>
<p id="cloudera-compatibility-matrix-end"><strong>Notes:</strong></p>
<ul class="simple">
<li>Cloudera Manager supports <a class="reference external" href="http://www.cloudera.com/documentation/enterprise/latest/topics/pcm_cdh_cm.html">a version of CDH no greater than its own</a>
(for example, CM version 5.1 supports CDH versions less than or equal to 5.1).</li>
<li>The version of the CDAP Parcel that is used should match the CSD major.minor version.</li>
</ul>
<p><strong>Steps:</strong></p>
<ol class="arabic simple">
<li>Download the CDAP CSD by <a class="reference external" href="http://cask.co/downloads/#cloudera">downloading the JAR file</a>.
Details on CSDs and Cloudera Manager Extensions are <a class="reference external" href="https://github.com/cloudera/cm_ext/wiki">available online</a>.</li>
<li id="cloudera-installation-csd">Install the CSD following the instructions at Cloudera’s website on <a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_mc_addon_services.html">Add-on Services</a>,
using the instructions given for the case of installing software in the form of a parcel.
In this case, you install the CSD first and then install the parcel second.</li>
<li>The first time the CDAP CSD is installed, the Cloudera Management Service may prompt
to be restarted. This is necessary for the CDAP services to be properly monitored.</li>
</ol>
</div>
<div class="section" id="downloading-and-installing-parcels">
<span id="cloudera-installation-download-distribute-parcel"></span><h3>Downloading and Installing Parcels<a class="headerlink" href="#downloading-and-installing-parcels" title="Permalink to this headline">🔗</a></h3>
<p>Download and distribute the CDAP-6.1.1 parcel. Complete instructions on parcels are
available at <a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_ig_parcels.html">Cloudera’s website</a>, but in summary these are the steps:</p>
<ol class="arabic simple">
<li>Installing the CSD adds the corresponding Cask parcel repository for you; however, you can
<a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_ig_parcels.html#cmug_topic_7_11_5_unique_1">customize the list of repositories</a>
searched by Cloudera Manager if you need to;</li>
<li><a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_ig_parcels.html#concept_vwq_421_yk_unique_1__section_cnx_b3y_bm_unique_1">Download</a>
the parcel to your Cloudera Manager server;</li>
<li><a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_ig_parcels.html#concept_vwq_421_yk_unique_1__section_sty_b3y_bm_unique_1">Distribute</a>
the parcel to all the servers in your cluster; and</li>
<li><a class="reference external" href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cm_ig_parcels.html#concept_vwq_421_yk_unique_1__section_ug1_c3y_bm_unique_1">Activate</a>
the parcel.</li>
</ol>
<div class="figure align-center" id="id5" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-parcels.png"><img alt="../_images/cloudera-parcels.png" class="bordered-image" src="../_images/cloudera-parcels.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cloudera Manager:</strong> CDAP Parcels <em>Distributed, Activated</em> on a cluster.</span></p>
</div>
<p><strong>Notes:</strong></p>
<ul>
<li><p class="first">If the Cask parcel repository is inaccessible to your cluster, please see <span class="xref std std-ref">these
suggestions</span>.</p>
</li>
<li><p class="first">The CDAP parcels are hosted at a repository determined by the CDAP version.
For instance, the CDAP 6.1 parcel metadata is accessed by Cloudera Manager at
<a class="reference external" href="http://repository.cask.co/parcels/cdap/6.1/manifest.json">this URL:</a></p>
<pre class="literal-block">
https://repository.cask.co/parcels/cdap/6.1/manifest.json
</pre>
</li>
</ul>
</div>
</div>
<div class="section" id="installing-cdap-services">
<h2>Installing CDAP Services<a class="headerlink" href="#installing-cdap-services" title="Permalink to this headline">🔗</a></h2>
<p>These instructions show how to use the Cloudera Manager Admin Console <em>Add Service</em> Wizard
to install and start CDAP. Note that the screens of the wizard will vary depending on
which version of Cloudera Manager and CDAP you are using.</p>
<div class="section" id="add-cdap-service">
<span id="cloudera-add-a-service"></span><h3>Add CDAP Service<a class="headerlink" href="#add-cdap-service" title="Permalink to this headline">🔗</a></h3>
<p>Start from the Cloudera Manager Admin Console’s <em>Home</em> page, selecting <em>Add Service</em> from the menu for your cluster:</p>
<div class="figure align-center" id="id6" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-01.png"><img alt="../_images/cloudera-csd-01.png" class="bordered-image" src="../_images/cloudera-csd-01.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cloudera Manager:</strong> Starting the <em>Add Service</em> Wizard.</span></p>
</div>
</div>
<div class="section" id="add-service-wizard-selecting-cdap">
<span id="cloudera-add-service-wizard"></span><h3>Add Service Wizard: Selecting CDAP<a class="headerlink" href="#add-service-wizard-selecting-cdap" title="Permalink to this headline">🔗</a></h3>
<p>Use the <em>Add Service</em> Wizard and select <em>CDAP</em>.</p>
<div class="figure align-center" id="id7" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-02.png"><img alt="../_images/cloudera-csd-02.png" class="bordered-image" src="../_images/cloudera-csd-02.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard:</strong> Selecting CDAP as the service to be added.</span></p>
</div>
</div>
<div class="section" id="add-service-wizard-specifying-dependencies">
<h3>Add Service Wizard: Specifying Dependencies<a class="headerlink" href="#add-service-wizard-specifying-dependencies" title="Permalink to this headline">🔗</a></h3>
<p>The <strong>Hive dependency</strong> is for the CDAP “Explore” component, which is enabled by default.
Note that if you do not select Hive, you will need to disable CDAP Explore in a later page
when you review these changes.</p>
<div class="figure align-center" id="id8" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-03.png"><img alt="../_images/cloudera-csd-03.png" class="bordered-image" src="../_images/cloudera-csd-03.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 1:</strong> Setting the dependencies (in this case, including Hive).</span></p>
</div>
</div>
<div class="section" id="add-service-wizard-customize-role-assignments">
<h3>Add Service Wizard: Customize Role Assignments<a class="headerlink" href="#add-service-wizard-customize-role-assignments" title="Permalink to this headline">🔗</a></h3>
<p><strong>Customize Role Assignments:</strong> Ensure the CDAP Master role is assigned to hosts colocated
with service <em>or</em> gateway roles for HBase, HDFS, YARN, and (optionally) Hive and Spark.</p>
<div class="figure align-center" id="id9" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-04.png"><img alt="../_images/cloudera-csd-04.png" class="bordered-image" src="../_images/cloudera-csd-04.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 2:</strong> When customizing Role Assignments, the <em>CDAP Security
Auth Service</em> can be added later, if required.</span></p>
</div>
</div>
<div class="section" id="id1">
<h3>Add Service Wizard: Customize Role Assignments<a class="headerlink" href="#id1" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id10" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-04b.png"><img alt="../_images/cloudera-csd-04b.png" class="bordered-image" src="../_images/cloudera-csd-04b.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 2 (dialog):</strong> Assigning the CDAP Master Role to a host with the
HBase, HDFS, YARN, Hive, and Spark Gateway roles. It could also be on a host with
running services instead.</span></p>
</div>
</div>
<div class="section" id="id2">
<h3>Add Service Wizard: Customize Role Assignments<a class="headerlink" href="#id2" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id11" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-04c.png"><img alt="../_images/cloudera-csd-04c.png" class="bordered-image" src="../_images/cloudera-csd-04c.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 2 (dialog):</strong> Completing assignments with the CDAP Gateway client
added to other nodes of the cluster; it can be added to nodes with CDAP roles.</span></p>
</div>
</div>
<div class="section" id="id3">
<h3>Add Service Wizard: Customize Role Assignments<a class="headerlink" href="#id3" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id12" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-05.png"><img alt="../_images/cloudera-csd-05.png" class="bordered-image" src="../_images/cloudera-csd-05.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 2:</strong> Completed role assignments.</span></p>
</div>
</div>
<div class="section" id="add-service-wizard-reviewing-configuration">
<span id="cloudera-installation-add-service-wizard-configuration"></span><h3>Add Service Wizard: Reviewing Configuration<a class="headerlink" href="#add-service-wizard-reviewing-configuration" title="Permalink to this headline">🔗</a></h3>
<p><strong>App Artifact Dir:</strong> This should initially point to the bundled system artifacts included
in the CDAP parcel directory. If you have modified <code class="docutils literal notranslate"><span class="pre">${PARCELS_ROOT}</span></code> for your instance
of Cloudera Manager, please update this setting (<em>App Artifact Dir</em>) to match. You may
want to customize this directory to a location outside of the CDAP Parcel.</p>
<p><strong>Explore Enabled:</strong> This needs to be disabled if you <strong>didn’t</strong> select Hive earlier.</p>
<p><strong>Kerberos Auth Enabled:</strong> This is needed if running on a secure Hadoop cluster.</p>
<p><strong>Router Bind Port, Router Server Port:</strong> These two ports should match; <em>Router Server
Port</em> is used by the CDAP UI to connect to the CDAP Router service.</p>
<div class="figure align-center" id="id13" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-06.png"><img alt="../_images/cloudera-csd-06.png" class="bordered-image" src="../_images/cloudera-csd-06.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 4:</strong> Reviewing changes and (initial) configuration.</span></p>
</div>
<p><strong>Additional CDAP configuration properties</strong> can be added using Cloudera Manager’s <em>Safety
Valve Advanced Configuration Snippets.</em> Documentation of the available CDAP properties is
in the <a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-site-xml"><span class="std std-ref">Appendix: cdap-site.xml, cdap-default.xml</span></a>. Note that for certain CDAP properties, the defaults
values for Cloudera may vary from the above appendix:</p>
<ul class="simple">
<li>For <code class="docutils literal notranslate"><span class="pre">kafka.server.log.dirs</span></code>, the default value is <code class="docutils literal notranslate"><span class="pre">{$LOCAL_DIR/kafka-logs}</span></code> or
<code class="docutils literal notranslate"><span class="pre">/var/tmp/cdap/kafka-logs</span></code>, instead of <code class="docutils literal notranslate"><span class="pre">/tmp/kafka-logs</span></code> as shown in the
<a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-default-kafka-server"><span class="std std-ref">Appendix: Kafka Server</span></a>.</li>
</ul>
<p><strong>Additional environment variables</strong> can be set, as required, using Cloudera Manager’s
<em>CDAP Service Environment Advanced Configuration Snippet (Safety Valve).</em> See the example below for
<a class="reference internal" href="#cloudera-configuring-spark"><span class="std std-ref">configuring Spark</span></a>.</p>
<p><strong>Note:</strong> Service-specific Java heap memory settings (that override the default values)
can be created by setting these environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">AUTH_JAVA_HEAPMAX</span>
<span class="go">KAFKA_JAVA_HEAPMAX</span>
<span class="go">MASTER_JAVA_HEAPMAX</span>
<span class="go">ROUTER_JAVA_HEAPMAX</span>
</pre></div>
</div>
<p><strong>At this point, the CDAP installation is configured</strong> and is ready to be installed. Review
your settings before continuing to the next step, which will install and start CDAP.</p>
</div>
</div>
<div class="section" id="starting-cdap-services">
<span id="cloudera-starting-services"></span><h2>Starting CDAP Services<a class="headerlink" href="#starting-cdap-services" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="add-service-wizard-first-run-of-commands">
<h3>Add Service Wizard: First Run of Commands<a class="headerlink" href="#add-service-wizard-first-run-of-commands" title="Permalink to this headline">🔗</a></h3>
<p>Executing commands to install and automatically start CDAP services.</p>
<div class="figure align-center" id="id14" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-07.png"><img alt="../_images/cloudera-csd-07.png" class="bordered-image" src="../_images/cloudera-csd-07.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 5:</strong> Finishing first run of commands to install and start CDAP.</span></p>
</div>
</div>
<div class="section" id="add-service-wizard-completion-page">
<h3>Add Service Wizard: Completion Page<a class="headerlink" href="#add-service-wizard-completion-page" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id15" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-08.png"><img alt="../_images/cloudera-csd-08.png" class="bordered-image" src="../_images/cloudera-csd-08.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Add Service Wizard, Page 6:</strong> Congratulations screen, though there is still work to be done.</span></p>
</div>
</div>
<div class="section" id="cluster-home-page-status-tab">
<h3>Cluster Home Page: Status Tab<a class="headerlink" href="#cluster-home-page-status-tab" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id16" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-09a.png"><img alt="../_images/cloudera-csd-09a.png" class="bordered-image" src="../_images/cloudera-csd-09a.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cluster Home Page, Status Tab:</strong> Showing all CDAP services running. <em>Gateway</em> is not an actual service.</span></p>
</div>
<div class="figure align-center" id="id17" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-09b.png"><img alt="../_images/cloudera-csd-09b.png" class="bordered-image" src="../_images/cloudera-csd-09b.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cloudera Manager Home Page:</strong> Showing CDAP installed on the cluster as a service.</span></p>
</div>
</div>
<div class="section" id="cluster-home-page-configuring-for-spark">
<span id="cloudera-configuring-spark"></span><h3>Cluster Home Page: Configuring for Spark<a class="headerlink" href="#cluster-home-page-configuring-for-spark" title="Permalink to this headline">🔗</a></h3>
<p><strong>Including Spark:</strong> If your cluster contains both Spark1 and Spark2, and you would like to use Spark2,
the <em>Environment Advanced Configuration</em> needs to contain the Spark version to use as
<code class="docutils literal notranslate"><span class="pre">SPARK_MAJOR_VERSION=2</span></code>. If you only have one version of Spark installed, CDAP will use that version.</p>
<p><strong>Additional environment variables</strong> are set using the Cloudera Manager’s
“CDAP Service Environment Advanced Configuration Snippet (Safety Valve)”.</p>
</div>
<div class="section" id="id4">
<h3>Cluster Home Page: Configuring for Spark<a class="headerlink" href="#id4" title="Permalink to this headline">🔗</a></h3>
<p>You will then have a stale configuration and need to restart the CDAP services.</p>
<div class="figure align-center" id="id18" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-11.png"><img alt="../_images/cloudera-csd-11.png" class="bordered-image" src="../_images/cloudera-csd-11.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cluster Home Page, Status Tab:</strong> Stale configuration that requires restarting.</span></p>
</div>
</div>
<div class="section" id="cluster-home-page-restarting-cdap">
<h3>Cluster Home Page: Restarting CDAP<a class="headerlink" href="#cluster-home-page-restarting-cdap" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id19" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-12.1.png"><img alt="../_images/cloudera-csd-12.1.png" class="bordered-image" src="../_images/cloudera-csd-12.1.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cluster Stale Configurations:</strong> Restarting CDAP services.</span></p>
</div>
<div class="figure align-center" id="id20" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-12.2.png"><img alt="../_images/cloudera-csd-12.2.png" class="bordered-image" src="../_images/cloudera-csd-12.2.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cluster Stale Configurations:</strong> Restarting CDAP services.</span></p>
</div>
</div>
<div class="section" id="cluster-home-page-cdap-services-restarted">
<h3>Cluster Home Page: CDAP Services Restarted<a class="headerlink" href="#cluster-home-page-cdap-services-restarted" title="Permalink to this headline">🔗</a></h3>
<div class="figure align-center" id="id21" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-09a.png"><img alt="../_images/cloudera-csd-09a.png" class="bordered-image" src="../_images/cloudera-csd-09a.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cluster Stale Configurations:</strong> CDAP services after restart.</span></p>
</div>
</div>
</div>
<div class="section" id="verification">
<span id="cloudera-verification"></span><h2>Verification<a class="headerlink" href="#verification" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="service-checks-in-cloudera-manager">
<h3>Service Checks in Cloudera Manager<a class="headerlink" href="#service-checks-in-cloudera-manager" title="Permalink to this headline">🔗</a></h3>
<p>After the Cloudera Manager Admin Console’s <em>Add Service</em> Wizard completes, <em>CDAP</em> will
show in your cluster’s list of services.</p>
<div class="figure align-center" id="id22" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-09b.png"><img alt="../_images/cloudera-csd-09b.png" class="bordered-image" src="../_images/cloudera-csd-09b.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cloudera Manager:</strong> CDAP added to the cluster.</span></p>
</div>
<p>You can select it, and go to the <em>CDAP</em> page, with <em>Quick Links</em> and <em>Status Summary</em>. The
lights of the <em>Status Summary</em> should all turn green, showing completion of startup.
(<strong>Note:</strong> <em>Gateway</em> is not an actual service, and does not show a green status indicator.)</p>
<p>The <em>Quick Links</em> includes a link to the <strong>CDAP UI</strong>, which by default is running on
port <code class="docutils literal notranslate"><span class="pre">11011</span></code> of the host where the UI role instance is running.</p>
<div class="figure align-center" id="id23" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/cloudera-csd-09a.png"><img alt="../_images/cloudera-csd-09a.png" class="bordered-image" src="../_images/cloudera-csd-09a.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Cloudera Manager:</strong> CDAP page showing available services and their status.</span></p>
</div>
</div>
<div class="section" id="cdap-smoke-test">
<span id="cloudera-cdap-ui"></span><h3>CDAP Smoke Test<a class="headerlink" href="#cdap-smoke-test" title="Permalink to this headline">🔗</a></h3>
<p>The CDAP UI may initially show errors while all of the CDAP YARN containers are
starting up. Allow for up to a few minutes for this.</p>
<p>The <em>Administration</em> page of the CDAP UI shows the status of the CDAP services.
It can be reached at <code class="docutils literal notranslate"><span class="pre">http://&lt;cdap-host&gt;:11011/cdap/administration</span></code>, substituting for
<code class="docutils literal notranslate"><span class="pre">&lt;cdap-host&gt;</span></code> the host name or IP address of the CDAP server:</p>
<div class="figure align-center" id="id24" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/console-distributed.png"><img alt="../_images/console-distributed.png" class="bordered-image" src="../_images/console-distributed.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>CDAP UI:</strong> Showing started-up, <em>Administration</em> page.</span></p>
</div>
</div>
</div>
<div class="section" id="advanced-topics">
<span id="cloudera-installation-advanced-topics"></span><h2>Advanced Topics<a class="headerlink" href="#advanced-topics" title="Permalink to this headline">🔗</a></h2>
<ul class="simple">
<li><a class="reference internal" href="#cloudera-configuration-security"><span class="std std-ref">Enabling Security</span></a></li>
<li><a class="reference internal" href="#cloudera-configuration-enabling-kerberos"><span class="std std-ref">Enabling Kerberos</span></a></li>
<li><a class="reference internal" href="#cloudera-configuration-highly-available"><span class="std std-ref">Enabling CDAP High Availability</span></a></li>
<li><a class="reference internal" href="#cloudera-configuration-enabling-hive-execution-engines"><span class="std std-ref">Enabling Hive Execution Engines</span></a></li>
<li><a class="reference internal" href="#cloudera-configuration-enabling-spark2"><span class="std std-ref">Enabling Spark2</span></a></li>
</ul>
<span class="target" id="cloudera-configuration-security"></span><div class="section" id="enabling-security">
<h3>Enabling Security<a class="headerlink" href="#enabling-security" title="Permalink to this headline">🔗</a></h3>
<p>Cask Data Application Platform (CDAP) supports securing clusters using perimeter security, authorization,
impersonation and secure storage.</p>
<p>Network (or cluster) perimeter security limits outside access, providing a first level of
security. However, perimeter security itself does not provide the safeguards of authentication,
authorization and service request management that a secure Hadoop cluster provides.</p>
<p>Authorization provides a way of enforcing access control on CDAP entities.</p>
<p>Impersonation ensures that programs inside CDAP are run as configured users at the namespace level. When enabled, it
guarantees that all actions on datasets, streams and other resources happen as the configured user.</p>
<p>We recommend that in order for CDAP to be secure, CDAP security should always be used in conjunction with
<a class="reference external" href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html">secure Hadoop clusters</a>.
In cases where secure Hadoop is not or cannot be used, it is inherently insecure and any applications
running on the cluster are effectively “trusted”. Although there is still value in having perimeter security,
authorization enforcement and secure storage in that situation, whenever possible a secure Hadoop
cluster should be employed with CDAP security.</p>
<p>For instructions on enabling CDAP Security, see <a class="reference internal" href="../security/index.html#admin-security"><span class="std std-ref">CDAP Security</span></a>.</p>
</div>
<div class="section" id="enabling-kerberos">
<span id="cloudera-configuration-enabling-kerberos"></span><h3>Enabling Kerberos<a class="headerlink" href="#enabling-kerberos" title="Permalink to this headline">🔗</a></h3>
<p>For Kerberos-enabled Hadoop clusters:</p>
<ul>
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user needs to be granted HBase permissions to create tables.
As the <code class="docutils literal notranslate"><span class="pre">hbase</span></code> user, issue the command:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="nb">echo</span> <span class="s2">&quot;grant &#39;cdap&#39;, &#39;RWCA&#39;&quot;</span> <span class="p">|</span> hbase shell
</pre></div>
</div>
</li>
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user must be able to launch YARN containers, either by adding it to the YARN
<code class="docutils literal notranslate"><span class="pre">allowed.system.users</span></code> or by adjusting the YARN <code class="docutils literal notranslate"><span class="pre">min.user.id</span></code> to include the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user.
(Search for the YARN configuration <code class="docutils literal notranslate"><span class="pre">allowed.system.users</span></code> in Cloudera Manager, and then add
the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user to the whitelist.)</p>
</li>
<li><p class="first">If you are converting an existing CDAP cluster to being Kerberos-enabled, then you may
run into YARN usercache directory permission problems. A non-Kerberos cluster with
default settings will run CDAP containers as the user <code class="docutils literal notranslate"><span class="pre">yarn</span></code>. A Kerberos cluster will
run them as the user <code class="docutils literal notranslate"><span class="pre">cdap</span></code>. When converting, the usercache directory that YARN
creates will already exist and be owned by a different user. On all datanodes, run this
command, substituting in the correct value of the YARN parameter <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> rm -rf &lt;YARN.NODEMANAGER.LOCAL-DIRS&gt;/usercache/cdap
</pre></div>
</div>
<p>(As <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code> can be a comma-separated list of directories, you may
need to run this command multiple times, once for each entry.)</p>
<p>If, for example, the setting for <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code> is <code class="docutils literal notranslate"><span class="pre">/yarn/nm</span></code>, you would use:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> rm -rf /yarn/nm/usercache/cdap
</pre></div>
</div>
<p>Restart CDAP after removing the usercache(s).</p>
</li>
</ul>
</div>
<div class="section" id="enabling-sentry">
<span id="cloudera-configuration-integration-sentry"></span><h3>Enabling Sentry<a class="headerlink" href="#enabling-sentry" title="Permalink to this headline">🔗</a></h3>
<p>To use CDAP with Cloudera clusters using Sentry authorization, refer to the steps at
<span class="xref std std-ref">Apache Sentry Configuration</span></p>
<p>The properties described there can be set from within Cloudera Manager by searching for them in the
configuration for each component; particularly, Sentry and Hive.</p>
</div>
<div class="section" id="enabling-cdap-ha">
<span id="cloudera-configuration-highly-available"></span><h3>Enabling CDAP HA<a class="headerlink" href="#enabling-cdap-ha" title="Permalink to this headline">🔗</a></h3>
<p>In addition to having a <a class="reference internal" href="../deployment-architectures.html#admin-manual-install-deployment-architectures-ha"><span class="std std-ref">cluster architecture</span></a>
that supports HA (high availability), these additional configuration steps need to be followed and completed:</p>
<div class="section" id="cdap-components">
<h4>CDAP Components<a class="headerlink" href="#cdap-components" title="Permalink to this headline">🔗</a></h4>
<p>For each of the CDAP components listed below (Master, Router, Kafka, UI, Authentication Server), these
comments apply:</p>
<ul class="simple">
<li>Sync the configuration files (such as <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> and <code class="docutils literal notranslate"><span class="pre">cdap-security.xml</span></code>) on all the nodes.</li>
<li>While the default <em>bind.address</em> settings (<code class="docutils literal notranslate"><span class="pre">0.0.0.0</span></code>, used for <code class="docutils literal notranslate"><span class="pre">app.bind.address</span></code>,
<code class="docutils literal notranslate"><span class="pre">data.tx.bind.address</span></code>, <code class="docutils literal notranslate"><span class="pre">router.bind.address</span></code>, and so on) can be synced across hosts,
if you customize them to a particular IP address, they will—as a result—be
different on different hosts. This can be controlled by the settings for an individual <em>Role Instance</em>.</li>
</ul>
</div>
<div class="section" id="cdap-master">
<h4>CDAP Master<a class="headerlink" href="#cdap-master" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Master service primarily performs coordination tasks and can be scaled for redundancy. The
instances coordinate amongst themselves, electing one as a leader at all times.</p>
<ul class="simple">
<li>Using the Cloudera Manager UI, add additional <em>Role Instances</em> of the role type <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Master</span>
<span class="pre">Service</span></code> to additional machines.</li>
<li>Ensure each machine has all required Gateway roles.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Master</span> <span class="pre">Service</span></code> role.</li>
</ul>
</div>
<div class="section" id="cdap-router">
<h4>CDAP Router<a class="headerlink" href="#cdap-router" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Router service is a stateless API endpoint for CDAP, and simply routes requests to the
appropriate service. It can be scaled horizontally for performance. A load balancer, if
desired, can be placed in front of the nodes running the service.</p>
<ul class="simple">
<li>Using the Cloudera Manager UI, add <em>Role Instances</em> of the role type <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Gateway/Router</span>
<span class="pre">Service</span></code> to additional machines.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Gateway/Router</span> <span class="pre">Service</span></code> role.</li>
</ul>
</div>
<div class="section" id="cdap-kafka">
<h4>CDAP Kafka<a class="headerlink" href="#cdap-kafka" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Cloudera Manager UI, add <em>Role Instances</em> of the role type <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Kafka</span> <span class="pre">Service</span></code>
to additional machines.</li>
<li>Two properties govern the Kafka setting in the cluster:<ul>
<li>The <strong>list of Kafka seed brokers</strong> is generated automatically, but the
replication factor (<code class="docutils literal notranslate"><span class="pre">kafka.server.default.replication.factor</span></code>) is not set
automatically. Instead, it needs to be set manually.</li>
<li>The <strong>replication factor</strong> is used to replicate Kafka messages across
multiple machines to prevent data loss in the event of a hardware
failure.</li>
</ul>
</li>
<li>The recommended setting is to run <strong>at least two</strong> Kafka brokers with a <strong>minimum replication
factor of two</strong>; set this property to the maximum number of tolerated machine failures
plus one (assuming you have that number of machines). For example, if you were running
five Kafka brokers, and would tolerate two of those failing, you would set the
replication factor to three. The number of Kafka brokers listed should always be equal to
or greater than the replication factor.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Kafka</span> <span class="pre">Service</span></code> role.</li>
</ul>
</div>
<div class="section" id="cdap-ui">
<h4>CDAP UI<a class="headerlink" href="#cdap-ui" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Cloudera Manager UI, add <em>Role Instances</em> of the role type <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">UI</span> <span class="pre">Service</span></code>
to additional machines.</li>
<li>For Cloudera Manager, the CDAP UI and the CDAP Router currently need to be colocated on
the same node.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">UI</span> <span class="pre">Service</span></code> role.</li>
</ul>
</div>
<div class="section" id="cdap-authentication-server">
<h4>CDAP Authentication Server<a class="headerlink" href="#cdap-authentication-server" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Cloudera Manager UI, add <em>Role Instances</em> of the role type <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Security</span> <span class="pre">Auth</span>
<span class="pre">Service</span></code> (the CDAP Authentication Server) to additional machines.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Security</span> <span class="pre">Auth</span> <span class="pre">Service</span></code> role.</li>
<li>Note that when an unauthenticated request is made in a secure HA setup, a list of all
running authentication endpoints will be returned in the body of the request.</li>
</ul>
</div>
</div>
<div class="section" id="hive-execution-engines">
<span id="cloudera-configuration-enabling-hive-execution-engines"></span><h3>Hive Execution Engines<a class="headerlink" href="#hive-execution-engines" title="Permalink to this headline">🔗</a></h3>
<p>CDAP Explore has support for additional execution engines such as
<a class="reference external" href="http://spark.apache.org/">Apache Spark</a> and
<a class="reference external" href="http://tez.apache.org/">Apache Tez</a>. Details on specifying these engines and
configuring CDAP are in the Developer Manual section on Date Exploration,
<a class="reference external" href="../../../developer-manual/data-exploration/hive-execution-engines.html#hive-ee" title="(in Cask Data Application Platform v6.1.1)"><span class="xref std std-ref">Hive Execution Engines</span></a>.</p>
</div>
<div class="section" id="enabling-spark2">
<span id="cloudera-configuration-enabling-spark2"></span><h3>Enabling Spark2<a class="headerlink" href="#enabling-spark2" title="Permalink to this headline">🔗</a></h3>
<p>In order to use Spark2, you must first install Spark2 on your cluster. If both Spark1
and Spark2 are installed, you must set SPARK_MAJOR_VERSION to 2 in cdap-env.
In addition, you must set Spark2 as a service dependency of CDAP. This can be done
in the Configuration section of CDAP, by searching for ‘dependency’.</p>
<p>You can verify that Spark2 is being used by CDAP by looking at stdout of the CDAP master.
As the master is starting up, you should see a line with ‘SPARK_COMPAT=spark2_2.11’.</p>
<p>When Spark2 is in use, Spark1 programs cannot be run. Similarly, when Spark1 is in use,
Spark2 programs cannot be run.</p>
<p>When CDAP starts up, it detects the spark version and uploads the corresponding pipeline
system artifact. If you have already started CDAP with Spark1,
you will also need to delete the pipeline system artifacts, then reload them in order
to use the spark2 versions. After CDAP has been restarted with Spark2, use the RESTful API:</p>
<pre class="literal-block">
<span class="gp">$</span> DELETE /v3/namespaces/system/artifacts/cdap-data-pipeline/versions/6.1.1
<span class="gp">$</span> DELETE /v3/namespaces/system/artifacts/cdap-data-streams/versions/6.1.1
<span class="gp">$</span> POST /v3/namespaces/system/artifacts
</pre>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Installation" href="index.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Installation on Amazon EMR using Bootstrap Actions" href="emr.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>